# _*_ coding : utf-8 _*_
# @Time : 2023/3/16 0016 16:22
# @Author : 菜鸟王小二
# @File : 21_jsonpath获取淘票票数据
# @Project : python爬虫
import json
import urllib.request
import jsonpath

url = 'https://dianying.taobao.com/showAction.json?_ksTS=1678954715666_64&jsoncallback=jsonp65&action=showAction&n_s=new&event_submit_doGetSoon=true'
headers = {
    # 下面四个开始带:的会报错，所以注释掉，accept-encoding也要注释掉，不然报编码错误
    # ':authority':'dianying.taobao.com',
    # ':method':'GET',
    # ':path':'/showAction.json?_ksTS=1678954715666_64&jsoncallback=jsonp65&action=showAction&n_s=new&event_submit_doGetSoon=true',
    # ':scheme':'https',
    'accept':'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
    # 'accept-encoding':'gzip, deflate, br',
    'accept-language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'bx-v':'2.2.3',
    'cookie':'cna=PPCRG3xSsEUCAXjliOb7UpUv; t=7590d933288a75a452b7794f4eafed72; cookie2=123bbbede54e9dbb55495d9a2c435c16; v=0; _tb_token_=f3ab3e50d3758; xlly_s=1; tb_city=441900; tb_cityName="tqvduA=="; tfstk=cNBNBuXN8ReZpJjYypvVaMws9U9OwbsGmv-WsrNVGCk6Lhfc5qK3IQKecDuDs; l=fBQ-1ITrTohTo-rFKOfaFurza77OSIRYYuPzaNbMi9fPO95B51LAW1M2g886C3GVFsHyR3SYy-ZBBeYBqQAonxvTaxom40kmndLHR35..; isg=BCYmjKpBfqO8YC2Ep4LYhPNud5yoB2rB4Z-gbxDPEskkk8ateJe60Qxl648fO2LZ',
    'referer':'https://dianying.taobao.com/?spm=a1z21.3046609.city.76.1891112ajnUs16&city=441900',
    'sec-ch-ua':'"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
    'sec-ch-ua-mobile':'?0',
    'sec-ch-ua-platform':'"Windows"',
    'sec-fetch-dest':'empty',
    'sec-fetch-mode':'cors',
    'sec-fetch-site':'same-origin',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63',
    'x-requested-with':'XMLHttpRequest',
}

request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
# 此时数据为字符串格式
print(type(content))

content = content.split(');')[0].split('(')
# 返回数据发现少了一部分，原因是中间的数据有小括号()导致分割成三块，所以pop(0)删掉第一块，留下后两块
content.pop(0)
# 再使用join方法将列表拼接为字符串
str_content = ''.join(content)
# json.loads()方法直接加载字符串数据，转换为对象
obj = json.loads(str_content)
print(type(obj))
# 使用转换的字典对象数据，并通过jsonpath获取电影名
name_list = jsonpath.jsonpath(obj = obj,expr = '$..showName')
print(name_list)